****************************************************************************
**		Author: 	Or Tuttnauer
**		Date: 		April 2019
**		Purpose: 	Use district-level data to calculate country-level variables
**		Input:		KHT_districtlevel.dta
**		Ouput: 		KHT_countrylevel.dta	                                                                 
*****************************************************************************

log using "KHT_basiccode.log", replace

set more off
cd "/Volumes/GoogleDrive/My Drive/enp replication/KHT replication package/"

use "KHT_basicdata.dta", clear


******************************
*** 1. DM measures ***********
******************************

sort cyear

by cyear: egen avgdm = mean(dm)
by cyear: egen meddm = median(dm) 
by cyear: egen mindm = min(dm) 
by cyear: egen maxdm = max(dm) 
by cyear: egen sddm =   sd(dm)
by cyear: egen skewdm = skew(dm)

** Percent legislators of top 25 percentile DM ***
sort cyear dm, stable
by cyear: gen disid=_n
by cyear: egen ndis=max(disid)
by cyear: gen ndis3quart=ceil(ndis*.75)

by cyear: egen t25leg=total(dm) if disid>ndis3quart
by cyear: egen temp=max(t25leg)
replace t25leg=temp

drop temp

gen tqleg= t25leg/tsc

** Seat ratio **
*  legislators of bottom 50 percentile DM *
sort cyear dm, stable
by cyear: gen ndismed=ndis*.5
gen rndismed=ceil(ndismed)

by cyear: egen bmedleg=total(dm) if disid<=rndismed & rndismed==ndismed
by cyear: egen bmedleg_odd=total(dm) if disid<rndismed & rndismed!=ndismed
replace bmedleg=bmedleg_odd+meddm/2 if bmedleg==.
by cyear: egen temp=max(bmedleg)
replace bmedleg=temp

drop temp bmedleg_odd

*  legislators of top 50 percentile DM *
sort cyear dm, stable

by cyear: egen tmedleg=total(dm) if disid>rndismed
replace tmedleg=tmedleg+meddm/2 if rndismed!=ndismed
by cyear: egen temp=max(tmedleg)
replace tmedleg=temp
drop temp

gen sr= tmedleg/bmedleg

** Logged variables **
gen lndm=ln(dm)
gen lnavgdm=ln(avgdm)
gen lnmeddm=ln(meddm)



**percentage legislators elected in dm<5 (acronym: psst5)**
by cyear: egen tdm5=total(dm) if dm<5 
gen psst5=tdm5/tsc
replace psst5=0 if psst5==.
drop tdm5

**percentage legislators elected in dm>12 (acronym: psgt12)**
by cyear: egen tdm12=total(dm) if dm>12
gen t2dm12=tdm12/tsc
by cyear: egen psgt12=max(t2dm12)
replace psgt12=0 if psgt12==.
drop tdm12 t2dm12



***************************
*** Merge with controls ***
***************************
merge m:1 cyear using "KHT_controls.dta"
drop _merge

save KHT_temp.dta, replace

******************************
*** Electoral Results data ***
******************************

use KHT_elecresults_lowerbound.dta, clear

gen vsc = votes/tvc
gen ssc = seats/tsc
label var vsc "vote share for party"
label var ssc "seats share for party"

gen vsc_sq=vsc^2
egen totalvsc_sq=total(vsc_sq), by (cyear)
gen enpv_lower=1/totalvsc_sq

egen country_unique = tag(cyear)
keep if country_unique==1

keep cyear enpv_lower

save KHT_enpv_lowerbound.dta, replace

use KHT_elecresults_upperbound.dta, clear

gen vsc = votes/tvc
gen ssc = seats/tsc
label var vsc "vote share for party"
label var ssc "seats share for party"

gen vsc_sq=vsc^2
egen totalvsc_sq=total(vsc_sq), by (cyear)
gen enpv_upper=1/totalvsc_sq

merge m:1 cyear using "KHT_enpv_lowerbound.dta"
drop if _merge!=3
drop _merge

gen enpv = (enpv_l + enpv_u)/2

*** calculate ENPS 
gen ssc_sq=ssc^2
egen totalssc_sq=total(ssc_sq), by (cyear)
gen enps=1/totalssc_sq

egen country_unique = tag(cyear)
keep if country_unique==1
keep cyear enps enpv

save KHT_enpdata.dta, replace


use KHT_temp.dta, clear
merge m:1 cyear using KHT_enpdata.dta
drop if _merge==2
drop _merge

***********************
** Interaction terms **
***********************
gen vlmeddm	=  enpv*lnmeddm
gen vsr		=  enpv*sr
gen vupper  =  enpv*upper
gen vlavgdm =  enpv* lnavgdm  	

gen vtqleg	=  enpv*tqleg
gen vsddm	=  enpv*sddm
gen vskewdm	=  enpv*skewdm
gen vpsgt12	=  enpv*psgt12	
gen vpsst5	=  enpv*psst5

save KHT_temp.dta, replace

keep cyear dm tsc district_c avgdm meddm sr dpr

save KHT_districtlevel.dta, replace

** Collapse data **
use KHT_temp.dta, clear

egen country_unique = tag(cyear)
keep if country_unique==1

drop    dm country_unique disid ndis3 t25leg ///
		ndismed rndismed bmedleg tmedleg  ///
		district_c
		

***** save new data file ***********
save "KHT_countrylevel.dta", replace

log close
